Read in the data

library(scran)
library(scater)
library(ggplot2)
library(DropletUtils)
library(openxlsx)
library(Rtsne)
library(stats)
library(knitr)
source("~/Dropbox/Postdoc/git/BE2020/Analysis/Functions/auxiliary.R")

Batch correction across tissues

I use data without patient 12 that might have gastric metaplasia samples.

sce.list <- readRDS("~/Dropbox/Postdoc/2019-12-29_BE2020/All_sce_filtered_high_quality.rds")
tissue.list <- list()

for(i in c("NE", "NG", "BE", "NSCJ", "BSCJ", "ND", "SMG")){
  
  cur_sces <- sce.list[which(grepl(i, names(sce.list)))]
  
  cur_sces <- lapply(cur_sces, function(n){
    rowData(n) <- rowData(n)[,1:2]
    n
  })
  
  #  cur_sces <-lapply(cur_sces, function (x) {
  #   if(colnames(rowData(x))[3] == "Type") {
  #     colnames(rowData(x))[3]<-"is_feature_control"
  #   } else {
  #     colnames(rowData(x))[3]<-colnames(rowData(x))[3]
  #   }
  #   x
  # })
  
  # Order sce object by size
  cur_sces <- cur_sces[order(unlist(lapply(cur_sces, ncol)), decreasing = TRUE)]
  
  
  
  # Combine sce objects
  tissue_sce <- do.call("cbind", cur_sces)
  qclusters <- quickCluster(tissue_sce)
  tissue_sce <- computeSumFactors(tissue_sce, clusters=qclusters)
  tissue_sce <- logNormCounts(tissue_sce)
  
  # Batch correction
  corrected <- batch.correction(cur_sces)
  
  # Save batch corrected counts in metdata
  metadata(tissue_sce)$corrected <- corrected
  
  # Compute tsne on corrected counts
  set.seed(1234)
  tsne <- Rtsne(t(corrected), pca = FALSE)
  
  # Store tsne in slot
  reducedDims(tissue_sce)$TSNE <- tsne$Y[,1:2]
  
  # Clustering on corrected data
  #dist.all <- as.dist(sqrt((1 - cor(as.matrix(corrected), 
  #                                    method = "spearman"))/2))
  #dendro <- hclust(dist.all, method = "ward.D2")
  #cluster <- as.character(cutreeDynamic(dendro = dendro, distM = as.matrix(dist.all), 
  #                      minClusterSize = 10, deepSplit = 0))
  if(i == "SMG") {
    g <- buildSNNGraph(corrected, k = 4, d = NA) # Use of k = 4 allows for capture of ductal cells as a seperate cluster in SMG samples. 
    cluster <- igraph::cluster_louvain(g)$membership
  } else {
    g <- buildSNNGraph(corrected, k = 5, d = NA) # A good balance needed to separate the samples.
    cluster <- igraph::cluster_louvain(g)$membership
  }

  # Save clustering in new slot
  colData(tissue_sce)$Tissue_cluster <- cluster
  
  # Visualize clustering
  cur_p <- ggplot(data.frame(tSNE1 = reducedDims(tissue_sce)$TSNE[,1],
                             tSNE2 = reducedDims(tissue_sce)$TSNE[,2],
                             clusters = as.factor(colData(tissue_sce)$Tissue_cluster))) +
    geom_point(aes(tSNE1, tSNE2, colour = clusters))+ggtitle(i)
  print(cur_p)
  
  
  cur_p <- ggplot(data.frame(tSNE1 = reducedDims(tissue_sce)$TSNE[,1],
                             tSNE2 = reducedDims(tissue_sce)$TSNE[,2],
                             clusters = as.factor(colData(tissue_sce)$Patient))) +
    geom_point(aes(tSNE1, tSNE2, colour = clusters))+ggtitle(i)
  print(cur_p)
  
  # Visualize whether populations are patient specific 
  print(kable(table(cluster, colData(tissue_sce)$Patient),row.names = TRUE))
  cat("\n")
  
  
  # Perform differential expression
  markers <- findMarkers(tissue_sce, groups = cluster, block = colData(tissue_sce)$Patient)
  
  
  markers.spec <- lapply(markers, function(n){
    if(!is.na(n$Top[1]) & !is.nan(sum(as.matrix(n[1,3:ncol(n)])))){
      test_n <- !is.na(n[1,3:ncol(n)])[1,]
      cur_n <- n[n$FDR < 0.1 & apply(n[,3:ncol(n)], 1, function(x){sum(x[test_n] > 0)}) == sum(test_n),]
      if(nrow(cur_n) > 0){
        cur_n$GeneName <- rowData(tissue_sce)$Symbol[match(rownames(cur_n), rowData(tissue_sce)$ID)]
      }
    }
    else{
      cur_n <- NULL
    }
    cur_n
  })
  
  if(!dir.exists(paste("~/Dropbox/Postdoc/2019-12-29_BE2020/Results/Marker_genes/Tissues_filtered/", 
                       i, sep = ""))){
    dir.create(paste("~/Dropbox/Postdoc/2019-12-29_BE2020/Results/Marker_genes/Tissues_filtered/", 
                     i, sep = ""))
  }
  
  write.xlsx(markers.spec, paste("~/Dropbox/Postdoc/2019-12-29_BE2020/Results/Marker_genes/Tissues_filtered/", i, "/Marker_genes.xlsx", sep = ""))
  
  tissue.list[[i]] <- tissue_sce
}

Patient01 Patient02 Patient03 Patient05 Patient06 Patient07 Patient08 Patient09 Patient10
1 14 86 3 6 0 2 4 14 11
2 184 0 0 4 105 0 0 0 0
3 128 23 17 28 47 27 34 52 27
4 268 497 0 143 13 0 43 40 554
5 109 731 274 98 0 21 36 8 123
6 184 656 66 239 0 90 137 32 95
7 12 73 0 48 0 0 2 1 68
8 477 418 17 152 111 2 41 87 286
9 187 652 30 124 1 5 102 64 399
10 98 347 59 174 0 193 68 6 53
11 107 176 15 119 18 45 36 88 83
12 358 925 67 196 2 48 192 115 236
13 203 654 47 229 1 64 126 35 85
14 91 225 76 115 0 117 35 2 36
15 133 115 66 198 14 176 56 47 42
16 386 7 0 52 404 208 3 54 3
17 22 26 12 3 17 0 6 14 2

Patient01 Patient02 Patient03 Patient06 Patient07 Patient08 Patient09 Patient10 Patient13
1 3 44 49 0 1 0 0 4 233
2 3 20 2 2 7 2 2 1 2
3 26 145 50 11 46 7 28 18 86
4 0 0 0 0 0 0 0 7 7
5 14 90 71 5 28 10 23 8 36
6 0 4 2 0 5 3 1 1 23
7 22 90 8 6 7 4 5 6 93
8 39 106 33 16 43 12 15 15 99
9 0 10 0 3 50 4 16 1 65
10 0 28 0 1 17 3 14 1 28
11 4 63 20 7 64 4 30 12 11
12 2 3 2 0 13 2 0 4 0
13 0 3 2 0 18 2 0 7 0
14 0 46 5 0 0 0 0 2 69
15 3 9 8 0 3 0 2 2 1
16 0 1 9 2 0 0 1 0 0
17 32 199 73 23 81 7 37 16 72
18 2 6 7 0 21 6 7 0 2
19 6 14 11 3 92 14 27 18 18
20 1 1 0 0 7 4 0 2 1
21 4 162 39 1 52 35 0 9 20
22 3 3 3 0 37 12 1 3 0
23 25 233 110 14 7 4 8 2 32
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 16 and 4
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 16 and 10
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 20 and 16

Patient03 Patient07 Patient09
1 5 23 0
2 0 82 0
3 1 39 2
4 2 27 1
5 4 262 0
6 61 98 54
7 2 126 11
8 1 174 0
9 75 52 21
10 37 43 20
11 12 81 7
12 71 35 31
13 75 95 57
14 54 22 17
15 165 33 18
16 51 44 12
17 88 67 41
18 6 32 32
19 27 113 37

Patient01 Patient02 Patient04 Patient05 Patient06 Patient08 Patient10 Patient13
1 0 9 237 74 0 4 51 107
2 0 11 74 100 47 43 30 42
3 0 2 267 153 71 77 47 321
4 0 7 111 223 48 39 28 48
5 8 229 3 0 36 34 37 13
6 26 503 12 2 30 43 23 19
7 38 505 7 0 11 27 43 15
8 62 481 3 3 1 17 16 4
9 0 8 410 70 12 44 31 126
10 0 42 140 117 42 56 23 345
11 0 29 367 102 15 69 34 214
12 0 0 80 0 8 0 21 103
13 0 0 112 51 32 1 92 31
14 0 7 3 0 38 0 13 92
15 0 1 166 17 29 2 17 41
16 0 5 138 140 26 176 6 479
17 1 4 16 0 62 8 5 33
18 0 2 16 15 418 10 14 77
19 0 59 22 67 57 43 72 155
20 0 0 20 7 113 0 41 3
21 0 18 104 57 58 24 111 154
22 0 1 0 1 1 1 36 4
23 2 7 42 0 39 16 16 28
24 0 1 271 143 40 33 50 158
25 1 0 24 0 12 2 1 14
26 15 83 5 2 4 29 9 0

Patient03 Patient07 Patient09
1 63 25 39
2 6 3 0
3 135 16 47
4 68 3 14
5 108 44 17
6 234 2 3
7 130 59 11
8 26 122 5
9 5 115 21
10 51 58 26
11 30 79 30
12 27 71 20
13 1 5 1
14 88 32 33
15 3 13 6
16 24 7 39
17 9 18 6
18 50 24 34
19 18 23 130
20 11 27 2
21 4 18 45

Patient07 Patient08 Patient09 Patient12
1 0 7 0 7
2 1 4 1 7
3 0 6 0 4
4 68 9 34 211
5 15 14 35 116
6 25 75 53 157
7 32 16 19 64
8 18 53 27 64
9 75 139 62 159
10 17 28 14 48
11 51 4 17 50
12 71 112 39 111
13 5 0 0 2
14 30 11 11 22
15 5 3 13 17
16 51 12 22 138
17 24 101 87 145
18 22 155 21 178
19 50 50 77 141
20 10 7 13 12

Patient10 Patient11 Patient13
1 0 6 1
2 61 163 1
3 15 126 5
4 0 67 0
5 2 81 11
6 63 187 3
7 59 126 0
8 10 57 0
9 3 57 0
10 162 46 37
11 23 46 0
12 17 208 0
13 51 232 0
14 227 20 23
15 41 118 5
16 18 41 105
17 26 32 0
18 12 122 0
19 30 70 72
20 1 106 120
saveRDS(tissue.list, "~/Dropbox/Postdoc/2019-12-29_BE2020/Tissue_sce_filtered.rds")

End Matter

To finish get session info:

sessionInfo()
## R version 3.6.2 (2019-12-12)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: Fedora 31 (Workstation Edition)
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] destiny_3.0.1               edgeR_3.28.0               
##  [3] limma_3.42.2                dbscan_1.1-5               
##  [5] princurve_2.1.4             dynamicTreeCut_1.63-1      
##  [7] knitr_1.28                  Rtsne_0.15                 
##  [9] openxlsx_4.1.4              DropletUtils_1.6.1         
## [11] scater_1.14.6               ggplot2_3.2.1              
## [13] scran_1.14.6                SingleCellExperiment_1.8.0 
## [15] SummarizedExperiment_1.16.1 DelayedArray_0.12.2        
## [17] BiocParallel_1.20.1         matrixStats_0.55.0         
## [19] Biobase_2.46.0              GenomicRanges_1.38.0       
## [21] GenomeInfoDb_1.22.0         IRanges_2.20.2             
## [23] S4Vectors_0.24.3            BiocGenerics_0.32.0        
## 
## loaded via a namespace (and not attached):
##   [1] ggbeeswarm_0.6.0         colorspace_1.4-1         RcppEigen_0.3.3.7.0     
##   [4] class_7.3-15             rio_0.5.16               XVector_0.26.0          
##   [7] RcppHNSW_0.2.0           BiocNeighbors_1.4.1      proxy_0.4-23            
##  [10] farver_2.0.3             hexbin_1.28.1            RSpectra_0.16-0         
##  [13] ranger_0.12.1            codetools_0.2-16         R.methodsS3_1.8.0       
##  [16] robustbase_0.93-5        R.oo_1.23.0              HDF5Array_1.14.2        
##  [19] compiler_3.6.2           ggplot.multistats_1.0.0  dqrng_0.2.1             
##  [22] assertthat_0.2.1         Matrix_1.2-18            lazyeval_0.2.2          
##  [25] formatR_1.7              BiocSingular_1.2.1       htmltools_0.4.0         
##  [28] tools_3.6.2              rsvd_1.0.2               igraph_1.2.4.2          
##  [31] gtable_0.3.0             glue_1.3.1               GenomeInfoDbData_1.2.2  
##  [34] dplyr_0.8.4              ggthemes_4.2.0           Rcpp_1.0.3              
##  [37] carData_3.0-3            cellranger_1.1.0         vctrs_0.2.2             
##  [40] DelayedMatrixStats_1.8.0 lmtest_0.9-37            xfun_0.12               
##  [43] laeken_0.5.1             stringr_1.4.0            lifecycle_0.1.0         
##  [46] irlba_2.3.3              statmod_1.4.33           DEoptimR_1.0-8          
##  [49] zlibbioc_1.32.0          MASS_7.3-51.4            zoo_1.8-7               
##  [52] scales_1.1.0             VIM_5.1.0                pcaMethods_1.78.0       
##  [55] hms_0.5.3                rhdf5_2.30.1             yaml_2.2.1              
##  [58] curl_4.3                 gridExtra_2.3            stringi_1.4.5           
##  [61] highr_0.8                e1071_1.7-3              TTR_0.23-6              
##  [64] boot_1.3-23              zip_2.0.4                rlang_0.4.4             
##  [67] pkgconfig_2.0.3          bitops_1.0-6             evaluate_0.14           
##  [70] lattice_0.20-38          purrr_0.3.3              Rhdf5lib_1.8.0          
##  [73] labeling_0.3             tidyselect_1.0.0         magrittr_1.5            
##  [76] R6_2.4.1                 pillar_1.4.3             haven_2.2.0             
##  [79] foreign_0.8-72           withr_2.1.2              xts_0.12-0              
##  [82] scatterplot3d_0.3-41     abind_1.4-5              RCurl_1.98-1.1          
##  [85] sp_1.3-2                 nnet_7.3-12              tibble_2.1.3            
##  [88] batchelor_1.2.4          crayon_1.3.4             car_3.0-6               
##  [91] rmarkdown_2.1            viridis_0.5.1            locfit_1.5-9.1          
##  [94] grid_3.6.2               readxl_1.3.1             data.table_1.12.8       
##  [97] forcats_0.4.0            vcd_1.4-5                digest_0.6.24           
## [100] tidyr_1.0.2              R.utils_2.9.2            munsell_0.5.0           
## [103] beeswarm_0.2.3           viridisLite_0.3.0        smoother_1.1            
## [106] vipor_0.4.5